Syntax.java example

Explorer

damp.ekeko.snippets-master
- damp.ekeko.snippets.plugin
  - src
    - damp
      - ekeko
        snippets
        BoundDirective.java
        DirectiveOperandBinding.java
        EkekoSnippetsPlugin.java
        ExtractedSnippet.java
        NaiveASTFlattener.java
        OperatorOperandBinding.java
        SnippetBaseListener.java
        SnippetBaseVisitor.java
        SnippetExtractor.java
        SnippetLexer.java
        SnippetListener.java
        SnippetParser.java
        SnippetVisitor.java
        data
        SnippetOperator.java
        TemplateGroup.java
        geneticsearch
        PartialJavaProjectModel.java
        gui
        BoundDirectivesEditorDialog.java
        BoundDirectivesViewer.java
        ChartCanvas.java
        ClojureFileEditorInput.java
        DirectiveOperandBindingEditingSupport.java
        DirectiveOperandBindingLabelProviderValue.java
        DirectiveSelectionDialog.java
        IntendedResultsEditor.java
        IntendedResultsEditorCommandHandler.java
        IntendedResultsEditorInput.java
        IntendedResultsEditorPersistableElementFactory.java
        MutationHistoryDialog.java
        OperandBindingLabelProviderDescription.java
        OperatorOperandBindingEditingSupport.java
        OperatorOperandBindingLabelProviderValue.java
        OperatorOperandsView.java
        OperatorOperandsViewer.java
        OperatorTreeContentProvider.java
        OperatorTreeLabelProvider.java
        PopulationInspectorDialog.java
        QueryInspectorDialog.java
        RecommendationEditor.java
        RecommendationEditorCommandHandler.java
        RecommendationEditorInput.java
        RecommendationEditorPersistableElementFactory.java
        RewritesTemplateEditor.java
        SubjectsTemplateEditor.java
        TemplateCodeGenerator.java
        TemplateEditor.java
        TemplateEditorActionBarContributor.java
        TemplateEditorCommandHandler.java
        TemplateEditorInput.java
        TemplateEditorPersistableElementFactory.java
        TemplateGroupNodeSelectionDialog.java
        TemplateGroupTemplateElement.java
        TemplateGroupViewer.java
        TemplateGroupViewerNodeDoubleClickListener.java
        TemplateGroupViewerNodeSelectionEvent.java
        TemplateGroupViewerNodeSelectionListener.java
        TemplatePrettyPrinter.java
        TemplateTreeContentProvider.java
        TemplateTreeLabelProviders.java
        TransformationEditor.java
        TransformationEditorActionBarContributor.java
        TransformationEditorCommandHandler.java
        TransformationEditorInput.java
        TransformationEditorPersistableElementFactory.java
        TransformationOverviewEditor.java
    - ec
      - util
        MersenneTwister.java
- damp.ekeko.snippets.plugin.test
  - resources
  - src
    - test
      - damp
        ekeko
        snippets
        EkekoSnippetsTest.java
        experiments
        GeneticSearchTest.java

/*
 *                 Sun Public License Notice
 * 
 * The contents of this file are subject to the Sun Public License
 * Version 1.0 (the "License"). You may not use this file except in
 * compliance with the License. A copy of the License is available at
 * http://www.sun.com/
 * 
 * The Original Code is Forte for Java, Community Edition. The Initial
 * Developer of the Original Code is Sun Microsystems, Inc. Portions
 * Copyright 1997-2000 Sun Microsystems, Inc. All Rights Reserved.
 */

package org.netbeans.editor;

/**
* Lexical analyzer that works on a given text buffer. It allows
* to sequentially parse a given character buffer by calling
* <tt>nextToken()</tt> that returns the token-ids. Token-ids are
* the integer constants greater or equal to zero. They are usually
* sequential but generally they don't have to. The highest token-id
* must be assigned to the <tt>highestTokenID</tt> variable
* in the constructor of the given analyzer. The token-id numbers can be
* translated to the meaningful names by calling <tt>getTokenName()</tt>.
*
* After the token is found by calling the <tt>nextToken</tt> method,
* the <tt>getTokenOffset()</tt> method can be used
* to get the starting offset of the current
* token in the buffer. The <tt>getTokenLength()</tt> gives the length
* of the current token.
*
* The heart of the analyzer is the <tt>parseToken()</tt> method which
* parses the text and returns the token-id of the last token found.
* The <tt>parseToken()</tt> method is called from the <tt>nextToken()</tt>.
* It operates with two important variables. The <tt>offset</tt>
* variable identifies the currently scanned character in the buffer.
* The <tt>tokenOffset</tt> is the begining of the current token.
* The <tt>state</tt> variable that identifies the current internal
* state of the analyzer is set accordingly when the characters are parsed.
* If the <tt>parseToken()</tt> recognizes a token, it returns its ID
* and the <tt>tokenOffset</tt> is its begining in the buffer and
* <tt>offset - tokenOffset</tt> is its length. When the token is processed
* the value of <tt>tokenOffset</tt> is set to be the same as current
* value of the <tt>offset</tt> and the parsing continues.
*
* Internal states are the integer constants used internally by analyzer.
* They are assigned to the <tt>state</tt> variable to express
* that the analyzer has moved from one state to another.
* They are usually numbered starting from zero but they don't
* have to. The only reserved value is -1 which is reserved
* for the INIT state - the initial internal state of the analyzer.
*
* There is also the support for defining the persistent info about
* the current state of the analyzer. This info can be later used
* to restore the parsing from some particular state instead of
* parsing from the begining of the buffer. This feature is very
* useful if there are the modifications performed in the document.
* The info is stored in the <tt>StateInfo</tt> interface
* with the <tt>BaseStateInfo</tt> as the basic implementation.
* It enables to get and set the two important values
* from the persistent point of view.
* The first one is the value of the <tt>state</tt> variable.
* The other one is the difference <tt>offset - tokenOffset</tt>
* which is called pre-scan. The particular analyzer can define
* additional values important for the persistent storage.
* The <tt>createStateInfo()</tt> can be overriden to create
* custom state-info and <tt>loadState()</tt> and <tt>storeState()</tt>
* can be overriden to get/set the additional values.
*
* The <tt>load()</tt> method sets the buffer to be parsed.
*
*
* @author Miloslav Metelka
* @version 1.00
*/

public class Syntax {

    /** Is the state of analyzer equal to a given state info? */
    public static final int EQUAL_STATE = 0;

    /** Is the state of analyzer different from given state info? */
    public static final int DIFFERENT_STATE = 1;


    /** Initial internal state of the analyzer */
    public static final int INIT = -1;


    /** Special token ID signaling invalid token. */
    public static final int INVALID = -3;

    /** Special token ID signaling that the end of the text buffer was reached. */
    public static final int EOT = -2;

    /** Special token ID signaling that the end of line was found. */
    public static final int EOL = -1;


    /** Token name describing invalid token ID */
    public static final String TN_INVALID = "INVALID"; // NOI18N

    /** Token name describing EOL */
    public static final String TN_EOL = "EOL"; // NOI18N

    /** Token name describing EOT */
    public static final String TN_EOT = "EOT"; // NOI18N


    // Some most common token names follow.
    /** Token name describing plain text */
    public static final String TN_TEXT = "text"; // NOI18N

    /** Token name describing errorneous text */
    public static final String TN_ERROR = "error"; // NOI18N

    /** Token name describing a keyword */
    public static final String TN_KEYWORD = "keyword"; // NOI18N

    /** Token name describing an identifier */
    public static final String TN_IDENTIFIER = "identifier"; // NOI18N

    /** Token name describing a function call */
    public static final String TN_FUNCTION = "function"; // NOI18N

    /** Token name describing an identifier */
    public static final String TN_OPERATOR = "operator"; // NOI18N

    /** Token name describing line comment */
    public static final String TN_LINE_COMMENT = "line-comment"; // NOI18N

    /** Token name describing block comment */
    public static final String TN_BLOCK_COMMENT = "block-comment"; // NOI18N

    /** Token name describing character constant */
    public static final String TN_CHAR = "char"; // NOI18N

    /** Token name describing string constant */
    public static final String TN_STRING = "string"; // NOI18N

    /** Token name describing integer constant */
    public static final String TN_INT = "int"; // NOI18N

    /** Token name describing hexadecimal constant */
    public static final String TN_HEX = "hex"; // NOI18N

    /** Token name describing octal constant */
    public static final String TN_OCTAL = "octal"; // NOI18N

    /** Token name describing long constant */
    public static final String TN_LONG = "long"; // NOI18N

    /** Token name describing float constant */
    public static final String TN_FLOAT = "float"; // NOI18N

    /** Token name describing double constant */
    public static final String TN_DOUBLE = "double"; // NOI18N



    /** Internal state of the lexical analyzer. At the begining
    * it's set to INIT value but it is changed by <tt>parseToken()</tt>
    * as the characters are processed one by one.
    */
    protected int state = INIT;

    /** Text buffer to scan */
    protected char buffer[];

    /** Current offset in the buffer */
    protected int offset;

    /** Offset holding the begining of the current token */
    protected int tokenOffset;

    /** Holds the additional information about the token parsed.
    * It can hold the concreate type of the keyword or operator
    * for example. The filling 
    * and each token ID.
    */
    protected int helperID;

    /** This field is reserved for the future use. */
    protected boolean lightError;

    /** This variable is the length of the token that was found */
    protected int tokenLength;

    /** Setting this flag to true means that there will be no more
    * buffers available so that analyzer should return all the tokens
    * including those whose successful scanning would be otherwise
    * left for later when the next buffer will be available.
    */
    protected boolean lastBuffer;

    /** On which offset in the buffer scanning should stop. */
    protected int stopOffset;

    /** The variable identifying the highest token ID used
    * by the syntax or -1 if the syntax defines no tokens.
    * This variable is used by <tt>getHighestTokenID()</tt>
    * and should be assigned in the constructor.
    */
    protected int highestTokenID = -1; // no tokens defined here


    /** Function that should be called externally to scan the text.
    * It manages the call to parseToken() and cares about the proper
    * setting of the offsets.
    * It can be extended to support any custom debugging required.
    */
    public int nextToken() {
        // Return immediately when at the end of buffer
        if (offset >= stopOffset) {
            tokenLength = 0;
            return EOT;
        }

        // Divide non-debug and debug sections
        int tokenID = parseToken();
        if (tokenID >= EOL) { // regular token found
            tokenLength = offset - tokenOffset;
            tokenOffset = offset;
            if (tokenLength == 0) { // test for empty token
                return nextToken(); // repeat until non-empty token is found
            }
        } else { // EOT returned
            tokenLength = 0;
        }
        return tokenID;
    }

    /** This is core function of analyzer and it returns one of following numbers:
    * a) token number of next token from scanned text
    * b) EOL when end of line was found in scanned buffer
    * c) EOT when there is no more chars available in scanned buffer.
    *
    * The function scans the active character and does one or more
    * of the following actions:
    * 1. change internal analyzer state (state = new-state)
    * 2. return token ID (return token-ID)
    * 3. adjust current position to signal different end of token;
    *    the character that offset points to is not included in the token
    */
    protected int parseToken() {
        return EOT;
    }

    /** Load the state from syntax mark into analyzer. This method is used when
    * @param chain chain of the mark states. It can be null
    * @param buffer buffer that will be scanned
    * @param offset offset of the first character that will be scanned
    * @param len length of the area to be scanned
    * @param lastBuffer whether this is the last buffer in the document. All the tokens
    *   will be returned including the last possibly incomplete one.
    */
    public void load(StateInfo stateInfo, char buffer[], int offset, int len, boolean lastBuffer) {
        this.buffer = buffer;
        this.offset = offset;
        this.tokenOffset = offset;
        this.stopOffset = offset + len;
        this.lastBuffer = lastBuffer;

        if (stateInfo != null) {
            loadState(stateInfo);
        } else {
            loadInitState();
        }
    }

    /** Relocate scanning to another buffer.
    * This is used to continue scanning after previously
    * reported EOT. Relocation delta between current offset and the requested offset
    * is computed and all the offsets are relocated. If there's a non-zero preScan
    * in the analyzer, it is a caller's responsibility to provide all the preScan
    * characters in the relocation buffer.
    * @param buffer next buffer where the scan will continue.
    * @param offset offset where the scan will continue.
    *   It's not decremented by the current preScan.
    * @param len length of the area to be scanned.
    *   It's not extended by the current preScan.
    * @param lastBuffer whether this is the last buffer in the document. All the tokens
    *   will be returned including the last possibly incomplete one.
    */
    public void relocate(char buffer[], int offset, int len, boolean lastBuffer) {
        this.buffer = buffer;
        this.lastBuffer = lastBuffer;

        int delta = offset - this.offset; // delta according to current offset
        this.offset += delta;
        this.tokenOffset += delta;
        this.stopOffset = offset + len;
    }

    /** Set if this buffer is the last one. */
    public void setLastBuffer(boolean lastBuffer) {
        this.lastBuffer = lastBuffer;
    }

    /** Set the offset in buffer where scnning should stop.
    * It forces the analyzer to stop explicitly at some
    * offset in the buffer. It's used for example when the document
    * is read initially.
    */
    public void setStopOffset(int stopOffset) {
        this.stopOffset = stopOffset;
    }

    /** Get the current buffer */
    public final char[] getBuffer() {
        return buffer;
    }

    /** Get the current scanning offset */
    public final int getOffset() {
        return offset;
    }

    /** Get start of token in scanned buffer. */
    public final int getTokenOffset() {
        return offset - tokenLength;
    }

    /** Get length of token in scanned buffer. */
    public final int getTokenLength() {
        return tokenLength;
    }

    /** Return the token ID in respect to specific syntax class.
    * This method becomes handy when the syntax is composed
    * from several other syntaxes.
    */
    public int translateTokenID(int tokenID, Class syntaxClass) {
        if (syntaxClass == null || this.getClass() == syntaxClass) {
            return tokenID;
        } else {
            return INVALID;
        }
    }

    /** Get the highest token ID. This method can be redefined although
    * usually it's enough to assign the <tt>highestTokenID</tt> variable
    * in the syntax constructor.
    */
    public int getHighestTokenID() {
        return highestTokenID;
    }

    /** Returns the token helper ID that if filled by the analyzer
    * holds the additional information about the token parsed.
    */
    public final int getHelperID() {
        return helperID;
    }

    /** Get the pre-scan which is a number
    * of characters between offset and tokenOffset.
    * If there's no more characters in the current buffer,
    * the analyzer returns EOT, but it can be in a state when
    * there are already some characters parsed at the end of
    * the current buffer but the token
    * is still incomplete and it cannot be returned yet.
    * The pre-scan value helps to determine how many characters
    * from the end of the current buffer should be present
    * at the begining of the next buffer so that the current
    * incomplete token can be returned as the first token
    * when parsing the next buffer.
    */
    public int getPreScan() {
        return offset - tokenOffset;
    }

    /** Initialize the analyzer when scanning from the begining
    * of the document or when the state stored in syntax mark
    * is null for some reason or to explicitly reset the analyzer
    * to the initial state. The offsets must not be touched by this method.
    */
    public void loadInitState() {
        state = INIT;
    }

    public void reset() {
        tokenLength = stopOffset = tokenOffset = offset = 0;
        loadInitState();
    }

    /** Load valid mark state into the analyzer. Offsets
    * are already initialized when this method is called. This method
    * must get the state from the mark and set it to the analyzer. Then
    * it must decrease tokenOffset by the preScan stored in the mark state.
    * @param markState mark state to be loaded into syntax. It must be non-null value.
    */
    public void loadState(StateInfo stateInfo) {
        state = stateInfo.getState();
        tokenOffset -= stateInfo.getPreScan();
    }

    /** Store state of this analyzer into given mark state. */
    public void storeState(StateInfo stateInfo) {
        stateInfo.setState(state);
        stateInfo.setPreScan(getPreScan());
    }

    /** Compare state of this analyzer to given state info */
    public int compareState(StateInfo stateInfo) {
        if (stateInfo != null) {
            return ((stateInfo.getState() == state) && stateInfo.getPreScan() == getPreScan())
                   ? EQUAL_STATE : DIFFERENT_STATE;
        } else {
            return DIFFERENT_STATE;
        }
    }

    /** Create state info appropriate for particular analyzer */
    public StateInfo createStateInfo() {
        return new BaseStateInfo();
    }

    /** Get the name of the token by knowing the tokenID. This method
    * is used for finding the proper coloring and for the debugging purposes too.
    */
    public String getTokenName(int tokenID) {
        // test special token IDs
        switch (tokenID) {
        case EOL:
            return TN_EOL;
        case EOT:
            return TN_EOT;
        case INVALID:
            return TN_INVALID;
        default: // token ID not recognized
            return "Unknown token ID " + tokenID; // NOI18N
        }
    }

    /** Get state name as string. It can be used for debugging purposes
    * by developer of new syntax analyzer. The states that this function
    * recognizes can include all constants used in analyzer so that it can
    * be used everywhere in analyzer to convert numbers to more practical strings.
    */
    public String getStateName(int stateNumber) {
        switch(stateNumber) {
        case INIT:
            return "INIT"; // NOI18N

        default:
            return "Unknown state " + stateNumber; // NOI18N
        }
    }

    /** Syntax information as String */
    public String toString() {
        return "tokenOffset=" + tokenOffset // NOI18N
               + ", offset=" + offset // NOI18N
               + ", state=" + getStateName(state) // NOI18N
               + ", stopOffset=" + stopOffset // NOI18N
               + ", lastBuffer=" + lastBuffer; // NOI18N
    }


    /** Interface that stores two basic pieces of information about
    * the state of the whole lexical analyzer - its internal state and preScan.
    */
    public interface StateInfo {

        /** Get the internal state */
        public int getState();

        /** Store the internal state */
        public void setState(int state);

        /** Get the preScan value */
        public int getPreScan();

        /** Store the preScan value */
        public void setPreScan(int preScan);

    }


    /** Base implementation of the StateInfo interface */
    public static class BaseStateInfo implements StateInfo {

        /** analyzer state */
        private int state;

        /** Pre-scan length */
        private int preScan;

        public int getState() {
            return state;
        }

        public void setState(int state) {
            this.state = state;
        }

        public int getPreScan() {
            return preScan;
        }

        public void setPreScan(int preScan) {
            this.preScan = preScan;
        }

        public String toString(Syntax syntax) {
            return "state=" + syntax.getStateName(getState()) + ", preScan=" + getPreScan(); // NOI18N
        }

    }

}

/*
 * Log
 *  30   Gandalf   1.29        1/13/00  Miloslav Metelka 
 *  29   Gandalf   1.28        1/7/00   Miloslav Metelka 
 *  28   Gandalf   1.27        1/6/00   Miloslav Metelka 
 *  27   Gandalf   1.26        1/4/00   Miloslav Metelka 
 *  26   Gandalf   1.25        12/28/99 Miloslav Metelka 
 *  25   Gandalf   1.24        10/23/99 Ian Formanek    NO SEMANTIC CHANGE - Sun
 *       Microsystems Copyright in File Comment
 *  24   Gandalf   1.23        9/16/99  Miloslav Metelka 
 *  23   Gandalf   1.22        9/15/99  Miloslav Metelka 
 *  22   Gandalf   1.21        9/10/99  Miloslav Metelka 
 *  21   Gandalf   1.20        8/27/99  Miloslav Metelka 
 *  20   Gandalf   1.19        8/17/99  Miloslav Metelka 
 *  19   Gandalf   1.18        7/26/99  Miloslav Metelka 
 *  18   Gandalf   1.17        7/20/99  Miloslav Metelka 
 *  17   Gandalf   1.16        7/2/99   Miloslav Metelka 
 *  16   Gandalf   1.15        6/22/99  Miloslav Metelka 
 *  15   Gandalf   1.14        6/8/99   Miloslav Metelka 
 *  14   Gandalf   1.13        6/1/99   Miloslav Metelka 
 *  13   Gandalf   1.12        5/24/99  Miloslav Metelka 
 *  12   Gandalf   1.11        5/21/99  Miloslav Metelka endInd removed; fix
 *  11   Gandalf   1.10        5/15/99  Miloslav Metelka fixes
 *  10   Gandalf   1.9         5/13/99  Miloslav Metelka 
 *  9    Gandalf   1.8         5/5/99   Miloslav Metelka 
 *  8    Gandalf   1.7         4/23/99  Miloslav Metelka Undo added and internal 
 *       improvements
 *  7    Gandalf   1.6         3/30/99  Miloslav Metelka 
 *  6    Gandalf   1.5         3/27/99  Miloslav Metelka 
 *  5    Gandalf   1.4         3/23/99  Miloslav Metelka 
 *  4    Gandalf   1.3         3/18/99  Miloslav Metelka 
 *  3    Gandalf   1.2         2/13/99  Miloslav Metelka 
 *  2    Gandalf   1.1         2/9/99   Miloslav Metelka 
 *  1    Gandalf   1.0         2/3/99   Miloslav Metelka 
 * $
 */